tmm_r <- read.delim(file = "mate_00vs02_tmm_filt_annot.txt", header=TRUE, row.names = NULL, com='', check.names=F)
head(tmm_r)
## locus_tag X00_mean X02_mean theta prob log2FC length.x
## 1 HVO_RS00090 57.83890 31.14313 0.6374613 0.9297680 0.8931261 653
## 2 HVO_RS00350 35.65117 58.33130 -0.3730532 0.7434937 -0.7103209 530
## 3 HVO_RS00570 62.16598 27.22173 0.9027281 0.9843431 1.1913668 1949
## 4 HVO_RS00635 45.59738 29.56472 0.4659552 0.8913868 0.6250740 863
## 5 HVO_RS00705 38.48181 51.32470 -0.2878642 0.6195012 -0.4154764 695
## 6 HVO_RS00755 2358.10620 1036.62999 1.6725932 0.9999403 1.1857277 413
## P wp dbxref ncbi_id
## 1 7.023204e-02 WP_004041134.1 8919107 lcl|NC_013964.1_cds_WP_004041134.1_106
## 2 2.565063e-01 WP_004041082.1 8919049 lcl|NC_013964.1_cds_WP_004041082.1_159
## 3 1.565688e-02 WP_004041038.1 8919024 lcl|NC_013964.1_cds_WP_004041038.1_203
## 4 1.086132e-01 WP_004041025.1 8919172 lcl|NC_013964.1_cds_WP_004041025.1_216
## 5 3.804988e-01 WP_004041011.1 8919253 lcl|NC_013964.1_cds_WP_004041011.1_230
## 6 5.971975e-05 WP_004041383.1 25143363 lcl|NC_013964.1_cds_WP_004041383.1_240
## genomic_accession feature start stop strand gene gbkey gene_biotype
## 1 NC_013964.1 gene 20051 20704 + 105 Gene protein_coding
## 2 NC_013964.1 gene 81961 82491 - 158 Gene protein_coding
## 3 NC_013964.1 gene 131561 133510 + 202 Gene protein_coding
## 4 NC_013964.1 gene 145516 146379 + 215 Gene protein_coding
## 5 NC_013964.1 gene 162589 163284 - 229 Gene protein_coding
## 6 NC_013964.1 gene 174818 175231 - 239 Gene protein_coding
## old_locus_tag length.y protein kegg1 kegg2 kegg3 pathway
## 1 HVO_B0020 653 bacterio-opsin activator <NA> <NA> <NA> <NA>
## 2 HVO_B0073 530 transcriptional regulator <NA> <NA> <NA> <NA>
## 3 HVO_B0118 1949 SMC-like protein Sph2 <NA> <NA> <NA> <NA>
## 4 HVO_B0131 863 hypothetical protein <NA> <NA> <NA> <NA>
## 5 HVO_B0145 695 creatininase <NA> <NA> <NA> <NA>
## 6 HVO_RS00755 413 response regulator <NA> <NA> <NA> <NA>
## kegg_prot kegg_id definition keggidprot
## 1 <NA> <NA> <NA> NA: NA
## 2 <NA> <NA> <NA> NA: NA
## 3 <NA> <NA> <NA> NA: NA
## 4 <NA> <NA> <NA> NA: NA
## 5 <NA> <NA> <NA> NA: NA
## 6 <NA> <NA> <NA> NA: NA
## Gene.Name Species EC_NUMBER
## 1 bacterio-opsin activator(HVO_RS00090) Haloferax volcanii DS2
## 2 transcriptional regulator(HVO_RS00350) Haloferax volcanii DS2
## 3 SMC-like protein Sph2(HVO_RS00570) Haloferax volcanii DS2
## 4 hypothetical protein(HVO_RS00635) Haloferax volcanii DS2
## 5 creatininase(HVO_RS00705) Haloferax volcanii DS2 3.5.2.10
## 6 response regulator(HVO_RS00755) Haloferax volcanii DS2
## GOTERM_BP_DIRECT GOTERM_CC_DIRECT
## 1
## 2 GO:0006351~transcription, DNA-templated GO:0005622~intracellular
## 3
## 4
## 5
## 6 GO:0000160~phosphorelay signal transduction system GO:0005622~intracellular
## GOTERM_MF_DIRECT
## 1
## 2 GO:0003700~transcription factor activity, sequence-specific DNA binding,GO:0043565~sequence-specific DNA binding
## 3
## 4
## 5 GO:0047789~creatininase activity
## 6 GO:0016301~kinase activity
## INTERPRO
## 1 IPR007050:Transcription regulator HTH, bacterioopsin
## 2 IPR000485:AsnC-type HTH domain,IPR011991:Winged helix-turn-helix DNA-binding domain,IPR019888:Transcription regulator AsnC-type
## 3 IPR027417:P-loop containing nucleoside triphosphate hydrolase
## 4 IPR011991:Winged helix-turn-helix DNA-binding domain
## 5 IPR003785:Creatininase/formamide hydrolase,IPR024087:Creatininase-like domain
## 6 IPR001789:Signal transduction response regulator, receiver domain,IPR011006:CheY-like superfamily
## KEGG_PATHWAY OFFICIAL_GENE_SYMBOL PFAM
## 1 HVO_RS00090 PF04967:HTH DNA binding domain
## 2 HVO_RS00350 PF13404:AsnC-type helix-turn-helix domain
## 3 HVO_RS00570
## 4 HVO_RS00635
## 5 HVO_RS00705 PF02633:Creatinine amidohydrolase
## 6 HVO_RS00755 PF00072:Response regulator receiver domain
## PIR_SUPERFAMILY SMART
## 1
## 2 SM00344:HTH_ASNC
## 3
## 4
## 5
## 6 SM00448:REC
## UP_KEYWORDS
## 1 Complete proteome,Plasmid,Reference proteome,
## 2 Complete proteome,DNA-binding,Plasmid,Reference proteome,Transcription,Transcription regulation,
## 3 Coiled coil,Complete proteome,
## 4 Complete proteome,Plasmid,Reference proteome,
## 5 Complete proteome,Hydrolase,Plasmid,Reference proteome,
## 6 Complete proteome,Kinase,Transferase,
## chr arCOG_name E.value score arCOG_familiy Gene_name
## 1 pHV3 arCOG02276 1.4e-21 75.5 T -
## 2 pHV3 arCOG01587 2.6e-64 213.6 K -
## 3 pHV3 arCOG00385 2.0e-08 32.2 S -
## 4 pHV3 arCOG01449 2.3e-66 221.6 V Csx1
## 5 pHV3 arCOG04536 2.6e-46 155.7 H ArfB
## 6 pHV3 arCOG02595 1.1e-33 113.2 T OmpR
## Description
## 1 Signal transduction regulator, containsPAS and HTH domains
## 2 DNA-binding transcriptional regulator, Lrp family
## 3 Uncharacterized coiled-coil protein
## 4 CARF domain containing protein
## 5 Creatinine amidohydrolase/Fe(II)-dependent formamide hydrolase involved in riboflavin and F420 biosynthesis
## 6 REC domain
## HVO EFFECTSIZE
## 1 HVO_B0020 0.8931261
## 2 HVO_B0073 -0.7103209
## 3 HVO_B0118 1.1913668
## 4 HVO_B0131 0.6250740
## 5 HVO_B0145 -0.4154764
## 6 --- 1.1857277
volc_obj <- volcanor(tmm_r, p = "P", effect_size = "EFFECTSIZE", snp = "locus_tag",
gene = "protein", annotation1 = "old_locus_tag", annotation2 = "arCOG_name")
volc_plot <- volcanoly(volc_obj, effect_size_line = c(-3,3), effect_size_line_color = "orange",
genomewideline = -log10(1e-2), genomewideline_color = "green", title = "t00 vs t02")
volc_plot_fixed <- volcanoly(volc_obj, effect_size_line = c(-3,3), effect_size_line_color = "orange",
genomewideline = -log10(1e-2), genomewideline_color = "green", title = "t00 vs t02",
highlight = tmm_r$locus_tag, highlight_color = "black")
volcanoly(volc_obj, effect_size_line = c(-3,3), effect_size_line_color = "orange",
genomewideline = -log10(1e-2), genomewideline_color = "green", title = "t00 vs t02")
Volcano plot doesn’t have annotations for non-significant points
print(volc_plot)
volcanoly(volc_obj, effect_size_line = c(-3,3), effect_size_line_color = "orange",
genomewideline = -log10(1e-2), genomewideline_color = "green", title = "t00 vs t02",
highlight = tmm_r$locus_tag, highlight_color = "black")
Annotations present for all, but significant are no longer separately highlighted
print(volc_plot_fixed)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(tmm_r) +
geom_point(data = tmm_r, aes(x = log2FC, y = -log10(P), text = paste(locus_tag,"\n",protein,"\n",arCOG_name,"\n",old_locus_tag)),
color = "black", cex = 2) +
# geom_point(data = tmm_r, aes(x = log2FC, y = P), color = "lightgreen", cex = 3) +
# geom_point(data = dfm1, aes(x = logFC.x, y = logFDR.x), color = "blue", cex = 3) +
# geom_text(data = dfm1, aes(x = logFC.x, y = logFDR.x, label = gene), hjust = 1, vjust = 2) +
theme_bw() +
xlab("log(2) fold change") +
ylab("log(10) FDR") +
geom_vline(xintercept = 2, col = "red", linetype = "dashed", size = 0.5) +
geom_vline(xintercept = -2, col = "red", linetype = "dashed", size = 0.5) +
geom_hline(yintercept = 0.05, col = "red", linetype = "dashed", size = 0.5)
## Warning: Ignoring unknown aesthetics: text
p
ggplotly(p, tooltip = "text")